setwd("~\\Replication\\Replication_final")
DIR<-"~\\Replication\\Replication_final\\Output\\"

# Figures ####

{
  # packages 
  # please install them if you don't have
  Sys.setlocale(, "Chinese")
  library(readxl)
  library(tidyverse)
  library(showtext)
  library(stringr)
  library(rlist)
  library(haven)
  library(knitr)
  library(kableExtra)
  library(lubridate)
  library(stargazer)
  library(haven)
  library(DescTools)
  library(cowplot)
  library(RColorBrewer)
  library(foreign)
}
# Figures ####

## Figure 1. Distribution of Headquarters Location, Investment Region, and Investment Industry (by Government Ownership) ####
{
gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
gp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\gpindustry.dta")
lp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\lpindustry.dta")
gp.region<-read_dta("Data\\Zero2IPO data used for analysis\\gpregion.dta")
lp.region<-read_dta("Data\\Zero2IPO data used for analysis\\lpregion.dta")
}  
  {
    gp1519$govgp <- as.character(gp1519$govgp)
    lp1519$govlp <- as.character(lp1519$govlp)
    gp.industry$govgp <- as.character(gp.industry$govgp)
    lp.industry$govlp <- as.character(lp.industry$govlp)
    gp.region$govgp <- as.character(gp.region$govgp)
    lp.region$govlp <- as.character(lp.region$govlp)
    
    gp1519$govgp[which(gp1519$govgp=="1")] <- "Yes"
    gp1519$govgp[which(gp1519$govgp=="0")] <- "No"
    lp1519$govlp[which(lp1519$govlp=="1")] <- "Yes"
    lp1519$govlp[which(lp1519$govlp=="0")] <- "No"
    gp.industry$govgp[which(gp.industry$govgp=="1")] <- "Yes"
    gp.industry$govgp[which(gp.industry$govgp=="0")] <- "No"
    lp.industry$govlp[which(lp.industry$govlp=="1")] <- "Yes"
    lp.industry$govlp[which(lp.industry$govlp=="0")] <- "No"
    gp.region$govgp[which(gp.region$govgp=="1")] <- "Yes"
    gp.region$govgp[which(gp.region$govgp=="0")] <- "No"
    lp.region$govlp[which(lp.region$govlp=="1")] <- "Yes"
    lp.region$govlp[which(lp.region$govlp=="0")] <- "No"
    colnames(gp1519)[which(colnames(gp1519)=="govgp")] <- "Government"
    colnames(lp1519)[which(colnames(lp1519)=="govlp")] <- "Government"
    colnames(gp.industry)[which(colnames(gp.industry)=="govgp")] <- "Government"
    colnames(lp.industry)[which(colnames(lp.industry)=="govlp")] <- "Government"
    colnames(gp.region)[which(colnames(gp.region)=="govgp")] <- "Government"
    colnames(lp.region)[which(colnames(lp.region)=="govlp")] <- "Government"
  }
  
  {
    gp1519 = gp1519[which(gp1519$coastal!="Foreign Countries"),]
    lp1519 = lp1519[which(lp1519$coastal!="Foreign Countries"),]
    gp.region = gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname),]
    lp.region = lp.region[which(lp.region$lp_fullname%in%lp1519$lp_fullname|
                                  lp.region$lp_fullname%in%gp1519$gp_fullname[which(gp1519$lp_respondent==1)]),]
    gp.industry = gp.industry[which(gp.industry$gp_fullname%in%gp1519$gp_fullname),]
    lp.industry = lp.industry[which(lp.industry$lp_fullname%in%lp1519$lp_fullname|
                                      lp.industry$lp_fullname%in%gp1519$gp_fullname[which(gp1519$lp_respondent==1)]),]
    
    gp.hq <- gp1519%>%group_by(coastal)%>% summarise(share=n()/nrow(gp1519)*100)
    gp.invregion <- gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                      !is.na(gp.region$coastal)),] %>%
      group_by(coastal) %>%
      summarise(share = n()/nrow(gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                                   !is.na(gp.region$coastal)),])*100)
    gp.ind <- gp.industry[which(gp.industry$gp_fullname%in%gp1519$gp_fullname&
                                  !is.na(gp.industry$industry_group)),] %>%
      group_by(industry_group) %>%
      summarise(share = n()/nrow(gp.industry[which(gp.industry$gp_fullname%in%gp1519$gp_fullname&
                                                     !is.na(gp.industry$industry_group)),])*100)
    
    lp.hq <- lp1519%>%group_by(coastal)%>% summarise(share=n()/nrow(lp1519)*100)
    lp.invregion <- lp.region[which(lp.region$lp_fullname%in%lp1519$lp_fullname &
                                      !is.na(lp.region$coastal)),] %>%
      group_by(coastal) %>%
      summarise(share = n()/nrow(lp.region[which(lp.region$lp_fullname%in%lp1519$lp_fullname&
                                                   !is.na(lp.region$coastal)),])*100)
    lp.ind <- lp.industry[which(lp.industry$lp_fullname%in%lp1519$lp_fullname&
                                  !is.na(lp.industry$industry_group)),] %>%
      group_by(industry_group) %>%
      summarise(share = n()/nrow(lp.industry[which(lp.industry$lp_fullname%in%lp1519$lp_fullname&
                                                     !is.na(lp.industry$industry_group)),])*100)
  }

{
  gp.hq.bygov <- gp1519%>% left_join(gp1519%>%group_by(Government)%>%summarise(n=n()))%>%
    group_by(Government,coastal)%>% summarise(share=n()/n[1]*100)
  #gp.hq.bygov[12,1]<-'1'
  #gp.hq.bygov[12,2]<-"Foreign Countries"
  #gp.hq.bygov[12,3]<-0
  
  gp.region.full <- gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                      !is.na(gp.region$coastal)),]
  
  gp.invregion.bygov <- gp.region.full%>% left_join(gp.region.full%>%group_by(Government)%>%summarise(n=n())) %>%
    group_by(Government,coastal) %>%
    summarise(share = n()/n[1]*100)
  
  gp.ind.full <- gp.industry[which(gp.industry$gp_fullname%in%gp1519$gp_fullname&
                                     !is.na(gp.industry$industry_group)),]
  
  gp.ind.bygov <- gp.ind.full%>% left_join(gp.ind.full%>%group_by(Government)%>%summarise(n=n())) %>%
    group_by(Government,industry_group) %>%
    summarise(share = n()/n[1]*100)
  
  lp.hq.bygov <- lp1519%>% left_join(lp1519%>%group_by(Government)%>%summarise(n=n()))%>%
    group_by(Government,coastal)%>% summarise(share=n()/n[1]*100)
  #lp.hq.bygov[12,1]<-'1'
  #lp.hq.bygov[12,2]<-"Foreign Countries"
  #lp.hq.bygov[12,3]<-0  
  
  lp.region.full <- lp.region[which(lp.region$lp_fullname%in%lp1519$lp_fullname&
                                      !is.na(lp.region$coastal)),]
  
  lp.invregion.bygov <- lp.region.full%>% left_join(lp.region.full%>%group_by(Government)%>%summarise(n=n())) %>%
    group_by(Government,coastal) %>%
    summarise(share = n()/n[1]*100)
  
  lp.ind.full <- lp.industry[which(lp.industry$lp_fullname%in%lp1519$lp_fullname&
                                     !is.na(lp.industry$industry_group)),]
  
  lp.ind.bygov <- lp.ind.full%>% left_join(lp.ind.full%>%group_by(Government)%>%summarise(n=n())) %>%
    group_by(Government,industry_group) %>%
    summarise(share = n()/n[1]*100)
}

{
  p11<- 
    ggplot(gp.hq.bygov, aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "GP: Headquarters")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  
  p12<-
    ggplot(gp.invregion.bygov,aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "GP: Investment Region")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  p13<-
    ggplot(gp.ind.bygov,aes(industry_group, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 35, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Industry Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "GP: Investment Industry")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 1,size = rel(0.8)))
  
  p14<-
    ggplot(lp.hq.bygov,aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "LP: Headquarters")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  p15<-
    ggplot(lp.invregion.bygov,aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "LP: Investment Region")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  p16<-
    ggplot(lp.ind.bygov,aes(industry_group, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 35, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Industry Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "LP: Investment Industry")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 1,size = rel(0.8)))
  
  phigh1<-plot_grid(p14,p15,p16,p11,p12,p13,
                   labels = 'AUTO',
                   nrow=2)
  
  ggsave2(phigh1,filename = 'Figure1.pdf',
          path = paste(DIR,"main figure/",sep=""),
          width = 14,
          height = 7)
}

## Figure 2. GP Dislike for LPs with Government Ties: Heterogeneity by Investment Sectors ####
{
  library(tidyverse)
  library(stringr)
  library(rlist)
  library(haven)
  library(grf)
  library(sandwich)
  library(lmtest)
  library(Hmisc)
  library(ggplot2)
  library(stargazer)
  set.seed(2022)
 
}

{
  set.seed(2022)
  load("Data\\Zero2IPO data used for analysis\\gpsurveyMLdataset.rdata")
  
  industry = readxl::read_xlsx("Data\\Zero2IPO data used for analysis\\ActiveOwnership_GP_investment_industry_imput.xlsx")
  
  industry = industry[which(industry$gp_fullname%in%gpsurveyML$gp_fullname),]
  industry = industry[which(industry$industry_group!=""),]
  industry = industry %>% group_by(gp_fullname,industry_group) %>% summarise(n=sum(num_invest,na.rm = T)) 
  industry = industry %>% group_by(gp_fullname) %>%
    mutate(mostinvest = ifelse(n==max(n,na.rm=T),1,0))
  industry.mostin = industry[which(industry$mostinvest==1),]
  industry.mostin = industry.mostin %>% group_by(gp_fullname) %>%
    summarise(industry_group = industry_group[sample(1:n(),1)])
  industrycode = data.frame(
    industry_group = unique(industry.mostin$industry_group),
    code = 1:length(unique(industry.mostin$industry_group))
  )
  industry.mostin = industry.mostin%>%left_join(industrycode)
}

# above50%-focus GP ####
# then no-randon assign

{
  set.seed(2022)
  load("Data\\Zero2IPO data used for analysis\\gpsurveyMLdataset.rdata")
  
  industry.above50 = 
    readxl::read_xlsx("Data\\Zero2IPO data used for analysis\\ActiveOwnership_GP_investment_industry_imput.xlsx")
  
  industry.above50 = industry.above50[which(industry.above50$gp_fullname%in%gpsurveyML$gp_fullname),]
  industry.above50 = industry.above50[which(industry.above50$industry_group!=""),]
  industry.above50 = industry.above50 %>% group_by(gp_fullname, industry_group) %>% 
    summarise(times=sum(num_invest,na.rm = T)) %>%
    group_by(gp_fullname) %>%
    mutate(share = times/sum(times))
  #industry.above75 = industry.above50[which(industry.above50$share>.75),]         
  industry.above50 = industry.above50[which(industry.above50$share>.5),]
  
  # 75: 93
  # 60: 114
  # >50:236
  
  industrycode = data.frame(
    industry_group = sort(unique(industry.above50$industry_group)),
    code = 1:length(unique(industry.above50$industry_group))
  )
  industry.above50 = industry.above50%>%left_join(industrycode)
  dummy = industry.above50 %>%
    mutate(dummy1 = ifelse(code==1,1,0),
           dummy2 = ifelse(code==2,1,0),
           dummy3 = ifelse(code==3,1,0),
           dummy4 = ifelse(code==4,1,0),
           dummy5 = ifelse(code==5,1,0),
           dummy6 = ifelse(code==6,1,0),
           dummy7 = ifelse(code==7,1,0),
           dummy8 = ifelse(code==8,1,0),
           dummy9 = ifelse(code==9,1,0),
           dummy10 = ifelse(code==10,1,0),
           dummy11 = ifelse(code==11,1,0),
           dummy12 = ifelse(code==12,1,0),
           dummy13 = ifelse(code==13,1,0)) %>% group_by(gp_fullname) %>%
    summarise(dummy1 = sum(dummy1),
              dummy2 = sum(dummy2),
              dummy3 = sum(dummy3),
              dummy4 = sum(dummy4),
              dummy5 = sum(dummy5),
              dummy6 = sum(dummy6),
              dummy7 = sum(dummy7),
              dummy8 = sum(dummy8),
              dummy9 = sum(dummy9),
              dummy10 = sum(dummy10),
              dummy11 = sum(dummy11),
              dummy12 = sum(dummy12),
              dummy13 = sum(dummy13))
}


## all gp ####

{
  # drop NA in q2
  gpsurveyML <- gpsurveyML[which(!is.na(gpsurveyML$q2)),]
  gpsurveyML.above50 = gpsurveyML %>% left_join(dummy)
  gpsurveyML.above50 = gpsurveyML.above50[which(gpsurveyML.above50$gp_fullname%in%industry.above50$gp_fullname),]
  X.raw = gpsurveyML.above50[,-c(1:4,6:18,19,20:21,23:24,27,32,36,37,41:48)]
  gp.id = as.numeric(gpsurveyML.above50$gp_fe[1:length(gpsurveyML.above50$gp_fe)])
  
  # use soeshare68 and 4digit-level strategic industry (at the same time)
  colnames(X.raw)
  X = X.raw
  X$gp_total_gov_share = ifelse(is.na(X$gp_total_gov_share),0,1)
  
  W = X$lp_relation_gov
  X = X[,-c(which(colnames(X)=="lp_relation_gov"))]
  Y = gpsurveyML.above50$q2
  
  X<- as.data.frame(
    sapply(X, function(x){
      x <- as.numeric(x[1:length(x)])
    })
  )
  
  Y = as.vector(Y)
  Y.forest = regression_forest(X, Y, clusters = gp.id, equalize.cluster.weights = TRUE)
  Y.hat = predict(Y.forest)$predictions
  W = as.vector(W)
  W.forest = regression_forest(X, W, clusters = gp.id, equalize.cluster.weights = TRUE)
  W.hat = predict(W.forest)$predictions
  
  cf.raw = causal_forest(X, Y, W,
                         Y.hat = Y.hat, W.hat = W.hat, clusters = gp.id, equalize.cluster.weights = TRUE)
  varimp = variable_importance(cf.raw)
  selected.idx = which(varimp > mean(varimp))
  
  cf = causal_forest(as.data.frame(X[,selected.idx]), Y, W,
                     Y.hat = Y.hat, W.hat = W.hat,
                     clusters = gp.id,
                     equalize.cluster.weights = TRUE,
                     tune.parameters = "all")
  tau.hat = predict(cf)$predictions
  
  
  #
  # Estimate ATE
  #
  
  ATE = average_treatment_effect(cf)
  paste("95% CI for the ATE:", round(ATE[1], 3),
        "+/-", round(qnorm(0.975) * ATE[2], 3))
  paste("99% CI for the ATE:", round(ATE[1], 3),
        "+/-", round(qnorm(0.995) * ATE[2], 3))
  
  if(F){# run this chunk only if not successfully indentify the ATE 
    ATE = average_treatment_effect(cf, target.sample = "treated")
    paste("95% CI for the ATE:", round(ATE[1], 3),
          "+/-", round(qnorm(0.975) * ATE[2], 3))
    paste("99% CI for the ATE:", round(ATE[1], 3),
          "+/-", round(qnorm(0.995) * ATE[2], 3))
  }
}

{
  data.all = cbind(X,data.frame(gp_id = as.factor(gp.id)))
  gp.mat = model.matrix(~ gp_id + 0, data = data.all)
  gp.size = colSums(gp.mat)
  
  dr.score = tau.hat + W / cf$W.hat *
    (Y - cf$Y.hat - (1 - cf$W.hat) * tau.hat) -
    (1 - W) / (1 - cf$W.hat) * (Y - cf$Y.hat + cf$W.hat * tau.hat)
  gp.score = t(gp.mat) %*% dr.score / gp.size
  
  mean(dr.score[X$dummy2],na.rm = T)
  
  CATE.raw = data.frame(
    industry =sort(industrycode$industry_group[c(2:9)]),
    CATE = c(#average_treatment_effect(cf, subset =as.logical(X$dummy1), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy2), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy3), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy4), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy5), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy6), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy7), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy8), target.sample = "treated")[1],
      average_treatment_effect(cf, subset =as.logical(X$dummy9), target.sample = "treated")[1]#,
      #average_treatment_effect(cf, subset =as.logical(X$dummy10), target.sample = "treated")[1],
      #average_treatment_effect(cf, subset =as.logical(X$dummy11), target.sample = "treated")[1],
      #average_treatment_effect(cf, subset =as.logical(X$dummy12), target.sample = "treated")[1],
      #average_treatment_effect(cf, subset =as.logical(X$dummy13), target.sample = "treated")[1]
    ),
    std.err = c(
      #average_treatment_effect(cf, subset =as.logical(X$dummy1), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy2), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy3), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy4), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy5), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy6), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy7), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy8), target.sample = "treated")[2],
      average_treatment_effect(cf, subset =as.logical(X$dummy9), target.sample = "treated")[2]#,
      #average_treatment_effect(cf, subset =as.logical(X$dummy10), target.sample = "treated")[2],
      #average_treatment_effect(cf, subset =as.logical(X$dummy11), target.sample = "treated")[2],
      #average_treatment_effect(cf, subset =as.logical(X$dummy12), target.sample = "treated")[2],
      #average_treatment_effect(cf, subset =as.logical(X$dummy13), target.sample = "treated")[2]
    )
  )
  
  CATE = CATE.raw %>% arrange(CATE) %>%
    mutate(
      upper = CATE + round(qnorm(0.975) * std.err, 3),
      lower = CATE - round(qnorm(0.975) * std.err, 3)
    )
  CATE$industry[which(CATE$industry=="Mining")]="Energy and Mineral"
  CATE = CATE %>% mutate(name = paste0(str_sub(101:108,-2,-1)," ",industry))
  
  
  
  ggplot()+
    geom_hline(yintercept = 0)+
    geom_segment(aes(x=name, y=lower, xend=name, yend=upper),color="grey",
                 data = CATE)+
    geom_point(aes(x = name, y = upper),data = CATE,shape="|",size=4,color="grey")+
    geom_point(aes(x = name, y = lower),data = CATE,shape="|",size=4,color="grey")+
    geom_point(aes(x = name, y = CATE),data = CATE,shape=18,size=2.5,color="#191970")+
    labs(title = "Conditional Average Treatment Effect with 95% CI",
         subtitle = "Above50%-focus Sample: All GP Respondents")+
    theme(text = element_text(size=10))+
    ylab("CATE")+
    xlab("")+
    coord_flip()
  
  {# save
    
    p=ggplot()+
      geom_hline(yintercept = 0)+
      geom_segment(aes(x=name, y=lower, xend=name, yend=upper),color="grey",
                   data = CATE)+
      geom_point(aes(x = name, y = upper),data = CATE,shape="|",size=4,color="grey")+
      geom_point(aes(x = name, y = lower),data = CATE,shape="|",size=4,color="grey")+
      geom_point(aes(x = name, y = CATE),data = CATE,shape=18,size=2.5,color="#191970")+
      theme(text = element_text(size=10))+
      ylab("CATE")+
      xlab("")+
      coord_flip()
    ggsave(filename = 'Figure2.pdf',
           path = paste(DIR,"main figure/",sep=""),
           p,width = 8, height = 4)
    
    }
  
  CATEall.above50 = CATE
}

## Figure 3. GP Dislike for LPs with Government Ties: Heterogeneity by Investment Sectors ####
{
  # packages
  library(tidyverse)
  library(stringr)
  library(readxl)
  library(haven)
  library(knitr)
  library(reshape2)
  library(cowplot)
  library(RColorBrewer)
  Sys.setlocale(,'Chinese')
  
  # set working path, need to change for replication
  # the raw data was also saved in this path, please copy and move it if needed 
 
}

{
  # load the raw data
  # please notice that the raw data should be put at proper path for replication
  survey1 <- read_excel('Data\\2021 survey data analysis on pros and cons\\2021new_survey_first_round.xlsx')
  # rename
  colnames(survey1)[5:17] <- c('one1','one2','one3','one4','one5','one6',
                               'two1','two2','two3','two4','two5','two6',
                               'three1')
  # rename
  survey2 <- read_excel('Data\\2021 survey data analysis on pros and cons\\2021new_survey_second_round.xlsx')
  survey2.1 <- survey2[,-c(10,17)]
  colnames(survey2.1)[5:17] <- c('one1','one2','one3','one4','one5','one6',
                                 'two1','two2','two3','two4','two5','two6',
                                 'three1')
  
  survey <- rbind(survey1,survey2.1)
  
  survey2.2 <- survey2[,c(1,10,17)]
  survey2.2 <- survey2.2[which(!is.na(survey2.2$`以上政府相关资金的优势，哪个相对重要？`)|
                                 !is.na(survey2.2$`以上政府相关资金提升的方面，哪个相对重要？`)),]
  
  # load the info of respondents
  # ownership info
  gp.info.own<-read_dta('Data\\Zero2IPO data used for analysis\\gp_ownershipsample_info.dta')
  # id info 
  gp.id <- unique(read_dta("Data\\survey data used for analysis\\gp_survey_data_regress.dta")[,1:2])
  # two survey have no overlap
  # all survey just cover our respondents
  
  
  # merge the new survey with id
  survey <- survey %>% left_join(gp.id)
  # merge the new survey with ownership info
  survey <- survey %>% left_join(gp.info.own[,c(1,17)])
  survey$gp_total_gov_share <- 
    ifelse(!is.na(survey$gp_total_gov_share),1,0)
  colnames(survey)[19] <- 'gov'
  survey$gov <- as.character(survey$gov)
  #survey <- survey[which(survey$gp_id!='TRC'),]
  #survey$one2[which(survey$one2==0)] <- NA
  survey2.2 <- survey2.2 %>% left_join(gp.id)
  survey2.2 <- survey2.2 %>% left_join(gp.info.own[,c(1,17)])
  survey2.2$gp_total_gov_share <- 
    ifelse(!is.na(survey2.2$gp_total_gov_share),1,0)
  colnames(survey2.2)[5] <- 'gov'
  survey2.2$gov <- as.character(survey2.2$gov)
  
  # imput all 0 with 1, because our option is limited within 1-10
  for(i in c(5:9,11:15,17)){
    survey[which(survey[,i]==0),i] <- 1
  }
}

{
  survey <- survey[,c(1,5:9,11:15,19)]
  
  # generate top 1 2 3 scores given by each respondent
  survey <- survey %>% mutate(
    HighestScore1 = NA,
    SecondScore1 = NA,
    ThirdScore1 = NA,
    HighestScore2 = NA,
    SecondScore2 = NA,
    ThirdScore2 = NA,
    LowestScore1 = NA,
    Low2Score1 = NA,
    Low3Score1 = NA,
    LowestScore2 = NA,
    Low2Score2 = NA,
    Low3Score2 = NA
  )
  
  for(i in 1:nrow(survey)){
    # high
    one <- sort(unique(as.numeric(survey[i,2:6])),decreasing = T)
    two <- sort(unique(as.numeric(survey[i,7:11])),decreasing = T)
    
    survey$HighestScore1[i] <- one[1]
    survey$HighestScore2[i] <- two[1]
    
    if(length(one)<3){
      survey$ThirdScore1[i] <- NA
    }else{survey$ThirdScore1[i] <- one[3]}
    if(length(two)<3){
      survey$ThirdScore2[i] <- NA
    }else{survey$ThirdScore2[i] <- two[3]}
    
    if(length(one)<2){
      survey$SecondScore1[i] <- NA
    }else{survey$SecondScore1[i] <- one[2]}
    if(length(two)<2){
      survey$SecondScore2[i] <- NA
    }else{survey$SecondScore2[i] <- two[2]}
    
    # low
    one <- sort(unique(as.numeric(survey[i,2:6])),decreasing = F)
    two <- sort(unique(as.numeric(survey[i,7:11])),decreasing = F)
    
    survey$LowestScore1[i] <- one[1]
    survey$LowestScore2[i] <- two[1]
    
    if(length(one)<3){
      survey$Low3Score1[i] <- NA
    }else{survey$Low3Score1[i] <- one[3]}
    if(length(two)<3){
      survey$Low3Score2[i] <- NA
    }else{survey$Low3Score2[i] <- two[3]}
    
    if(length(one)<2){
      survey$Low2Score1[i] <- NA
    }else{survey$Low2Score1[i] <- one[2]}
    if(length(two)<2){
      survey$Low2Score2[i] <- NA
    }else{survey$Low2Score2[i] <- two[2]}
    
  }
  
}



{
  
  set.seed(7)
  survey2.2plus <- survey[which(!survey$gp_id%in%survey2.2$gp_id | survey$gp_id=="CLY"),c(1:13,16)]
  survey2.2plus <- 
    survey2.2plus %>% mutate(topadv = NA, topdis = NA)
  
  survey2.2plus$topadv <- as.numeric(survey2.2plus$topadv)
  survey2.2plus$topdis <- as.numeric(survey2.2plus$topdis)
  for(i in 1:nrow(survey2.2plus)){
    topadv <- c()
    topdis <- c()
    for(j in 2:6){
      if(survey2.2plus[i,j]==survey2.2plus[i,13]){
        topadv <- c(topadv,j-1)
      }
    }
    if(length(topadv)==1){
      survey2.2plus[i,15] <- topadv
    }else{
      survey2.2plus[i,15] <- sample(topadv,1,replace = F)
    }
    
    
    for(k in 7:11){
      if(survey2.2plus[i,k]==survey2.2plus[i,14]){
        topdis <- c(topdis,k-6)
      }
    }
    
    if(length(topdis)==1){
      survey2.2plus[i,16] <- topdis
    }else{
      survey2.2plus[i,16] <- sample(topdis,1,replace = F)
    }
    
    
  }
  
  colnames(survey2.2)[2:3] <- c("topadv","topdis")
  survey2.2$topadv <- as.character(survey2.2$topadv)
  survey2.2$topdis <- as.character(survey2.2$topdis)
  
  survey2.2full <- rbind(survey2.2[survey2.2$gp_id!="CLY",c(1:3,5)], survey2.2plus[,c(1,15,16,12)])
  
  topadv <- survey2.2full[!is.na(survey2.2full$topadv),] %>% 
    mutate(n = nrow(survey2.2full[!is.na(survey2.2full$topadv),])) %>%
    group_by(topadv) %>%
    summarise(share = 100*n()/n[1])
  topdis <- survey2.2full[!is.na(survey2.2full$topdis),] %>% 
    mutate(n = nrow(survey2.2full[!is.na(survey2.2full$topdis),])) %>%
    group_by(topdis) %>%
    summarise(share = 100*n()/n[1])  
  
  topadv.bygov <- survey2.2full[!is.na(survey2.2full$topadv),] %>%
    group_by(gov,topadv) %>% summarise(freq = n()) %>%
    group_by(gov) %>%
    mutate(n = sum(freq)) %>%
    mutate(share = freq/n*100)
  
  topdis.bygov <- survey2.2full[!is.na(survey2.2full$topdis),] %>%
    group_by(gov,topdis) %>% summarise(freq = n()) %>%
    group_by(gov) %>%
    mutate(n = sum(freq)) %>%
    mutate(share = freq/n*100)
  
  
  topadv <- topadv %>% arrange(topadv)
  topdis <- topdis %>% arrange(topdis)
  
  topadv$topadv <- c("1. Regulatory Approvals and Tax Reductions", 
                     "2. Reduce Fundraising Pressure", 
                     "3. Access to Information", 
                     "4. Obtain Local Government Support", 
                     "5. Attract Potential Investors")
  topdis$topdis <- c("1. Investment Interference", 
                     "2. No Risk Tolerance", 
                     "3. Short Investment Horizon", 
                     "4. Lack of Professional Team", 
                     "5. Exposure to Policy Uncertainty")
}

{
  p31<-ggplot(topadv,aes(topadv, weight = share))+
    geom_hline(yintercept = seq(5, 25, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "\n")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0,size = rel(0.87)))
  
  p32<-ggplot(topdis,aes(topdis, weight = share))+
    geom_hline(yintercept = seq(5, 25, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "\n")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  phigh3<-plot_grid(p32,p31,
                   #labels = "AUTO",
                   labels = c("A: Main Disadvantages","B: Main Advantages"),
                   nrow=2,
                   align = "v")
  
  ggsave2(phigh3,filename = 'Figure3.pdf',
          path = paste(DIR,"main figure/",sep=""),
          width = 9,
          height = 9)
}
## Figure A1. Distribution of Headquarters Location, Investment Region, and Investment Industry (Respondents Only) ####
{
  gp.resp <- gp1519[which(gp1519$gp_respondent==1),]
  gplp <- gp1519[which(gp1519$lp_respondent==1),c(1,5,15)]
  colnames(gplp)[1:2] <- c("lp_fullname","Government")
  lp.resp <- lp1519[which(lp1519$lp_respondent==1),c(1,5,11)]
  lp.resp <- rbind(lp.resp,gplp)
}

{
  gp.resp.hq <- gp.resp%>%group_by(coastal)%>% summarise(share=n()/nrow(gp.resp)*100)
  gp.resp.invregion <- gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                         !is.na(gp.region$coastal)),] %>%
    group_by(coastal) %>%
    summarise(share = n()/nrow(gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                                 !is.na(gp.region$coastal)),])*100)
  gp.resp.ind <- gp.industry[which(gp.industry$gp_respondent==1),] %>%
    group_by(industry_group) %>%
    summarise(share = n()/nrow(gp.industry[which(gp.industry$gp_respondent==1),])*100)
  
  lp.resp.hq <- lp.resp%>%group_by(coastal)%>% summarise(share=n()/nrow(lp.resp)*100)
  
  gplp.region <- gp.region[which(gp.region$gp_fullname%in%lp.resp$lp_fullname[which(!lp.resp$lp_fullname%in%lp.region$lp_fullname[which(lp.region$lp_respondent==1)])]),]
  
  gplp.region <- gplp.region[,-3]
  
  colnames(gplp.region)[c(1,4)] <- c("lp_fullname","Government")
  
  lp.resp.invregion <- rbind(lp.region[which(lp.region$lp_respondent==1),],gplp.region )
  
  
  lp.resp.invregion<- lp.resp.invregion[which(!is.na(lp.resp.invregion$coastal)),] %>%
    group_by(coastal) %>%
    summarise(share = n()/nrow(lp.resp.invregion[which(!is.na(lp.resp.invregion$coastal)),])*100)
  
  # 
  
  gplp.industry <- gp.industry[which(gp.industry$gp_fullname%in%lp.resp$lp_fullname[which(!lp.resp$lp_fullname%in%lp.industry$lp_fullname[which(lp.industry$lp_respondent==1)])]),]
  
  gplp.industry <- gplp.industry[,-3]
  
  colnames(gplp.industry)[c(1,4)] <- c("lp_fullname","Government")
  
  lp.resp.ind <- rbind(lp.industry[which(lp.industry$lp_respondent==1),],gplp.industry)
  
  
  lp.resp.ind <- lp.resp.ind %>%
    group_by(industry_group) %>%
    summarise(share = n()/nrow(lp.resp.ind)*100)
}


{
  pa11<-
    ggplot(gp.resp.hq,aes(coastal, weight = share))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "GP Resp.: Headquarters")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa12<-
    ggplot(gp.resp.invregion,aes(coastal, weight = share))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "GP Resp.: Investment Region")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa13<-
    ggplot(gp.resp.ind,aes(industry_group, weight = share))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Industry Group')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "GP Resp.: Investment Industry")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 1,size = rel(0.8)))
  
  pa14<-
    ggplot(lp.resp.hq,aes(coastal, weight = share))+
    geom_hline(yintercept = seq(0, 25, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "LP Resp.: Headquarters")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa15<-
    ggplot(lp.resp.invregion,aes(coastal, weight = share))+
    geom_hline(yintercept = seq(0, 40, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "LP Resp.: Investment Region")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa16<-
    ggplot(lp.resp.ind,aes(industry_group, weight = share))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'stack')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Industry Group')+
    scale_fill_brewer(palette = "Set3")+
    labs(title = "LP Resp.: Investment Industry")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 1,size = rel(0.8)))
  
  phighA1<-plot_grid(pa14,pa15,pa16,pa11,pa12,pa13,
                   labels = 'AUTO',
                   nrow=2)
  
  ggsave2(phighA1, filename = 'FigureA1.pdf',
          path = paste(DIR,"appendix figure/",sep=""),
          width = 14,
          height = 7)
 
  
}

## Figure A2. Distribution of Headquarters Location, Investment Region, and Investment Industry (Respondents Only; by Government Ownership) ####


{
  gp1519$Government <- as.character(gp1519$Government)
  lp1519$Government <- as.character(lp1519$Government)
  gp.industry$Government <- as.character(gp.industry$Government)
  lp.industry$Government <- as.character(lp.industry$Government)
  gp.region$Government <- as.character(gp.region$Government)
  lp.region$Government <- as.character(lp.region$Government)
  gp.resp <- gp1519[which(gp1519$gp_respondent==1),]
  gplp <- gp1519[which(gp1519$lp_respondent==1),c(1,5,15)]
  colnames(gplp)[1:2] <- c("lp_fullname","Government")
  lp.resp <- lp1519[which(lp1519$lp_respondent==1),c(1,5,11)]
  lp.resp <- rbind(lp.resp,gplp)
  
  gp.resp <- gp.resp %>% left_join(gp.resp %>% group_by(Government) %>% summarise(n=n()))
  gp.resp.hq.bygov <- gp.resp%>%group_by(Government,coastal)%>% summarise(share=n()/n[1]*100)
  #gp.resp.hq.bygov[12,1]<-'1'
  #gp.resp.hq.bygov[12,2]<-"Foreign Countries"
  #gp.resp.hq.bygov[12,3]<-0
  
  gp.region.resp <- gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                      !is.na(gp.region$coastal)&
                                      gp.region$gp_respondent==1),] %>% left_join(
                                        gp.region[which(gp.region$gp_fullname%in%gp1519$gp_fullname&
                                                          !is.na(gp.region$coastal)&
                                                          gp.region$gp_respondent==1),] %>% group_by(Government) %>% summarise(n=n())
                                      )
  
  gp.resp.invregion.bygov <- gp.region.resp %>%
    group_by(Government,coastal) %>%
    summarise(share = n()/n[1]*100)
  
  gp.industry.resp <- gp.industry[which(gp.industry$gp_respondent==1),] %>% 
    left_join(
      gp.industry[which(gp.industry$gp_respondent==1),] %>% group_by(Government) %>%
        summarise(n=n())
    )
  
  gp.resp.ind.bygov <- gp.industry.resp %>%
    group_by(Government, industry_group) %>%
    summarise(share = n()/n[1]*100)
  
  lp.resp <- lp.resp %>% left_join(lp.resp %>% group_by(Government) %>% summarise(n = n()))
  
  lp.resp.hq.bygov<- lp.resp%>%group_by(Government,coastal)%>% summarise(share=n()/n[1]*100)
  lp.resp.hq.bygov[10,1]<-'No'
  lp.resp.hq.bygov[10,2]<-"Inland Region"
  lp.resp.hq.bygov[10,3]<-0
  #lp.resp.hq.bygov[12,1]<-'1'
  #lp.resp.hq.bygov[12,2]<-"Foreign Countries"
  #lp.resp.hq.bygov[12,3]<-0
  
  gplp.region <- gp.region[which(gp.region$gp_fullname%in%lp.resp$lp_fullname[which(!lp.resp$lp_fullname%in%lp.region$lp_fullname[which(lp.region$lp_respondent==1)])]),]
  
  gplp.region <- gplp.region[,-3]
  
  colnames(gplp.region)[c(1,4)] <- c("lp_fullname","Government")
  
  lp.resp.invregion <- rbind(lp.region[which(lp.region$lp_respondent==1),],gplp.region )
  
  lp.resp.invregion <- lp.resp.invregion[which(!is.na(lp.resp.invregion$coastal)),] %>% left_join(
    lp.resp.invregion[which(!is.na(lp.resp.invregion$coastal)),] %>% group_by(Government) %>% summarise(n=n())
  )
  
  lp.resp.invregion.bygov<- lp.resp.invregion %>%
    group_by(Government,coastal) %>%
    summarise(share = n()/n[1]*100)
  
  # 
  
  gplp.industry <- gp.industry[which(gp.industry$gp_fullname%in%lp.resp$lp_fullname[which(!lp.resp$lp_fullname%in%lp.industry$lp_fullname[which(lp.industry$lp_respondent==1)])]),]
  
  gplp.industry <- gplp.industry[,-3]
  
  colnames(gplp.industry)[c(1,4)] <- c("lp_fullname","Government")
  
  lp.resp.ind <- rbind(lp.industry[which(lp.industry$lp_respondent==1),],gplp.industry)
  
  lp.resp.ind <- lp.resp.ind %>% left_join(lp.resp.ind %>% group_by(Government) %>% summarise(n=n()))
  
  lp.resp.ind.bygov <- lp.resp.ind %>%
    group_by(Government,industry_group) %>%
    summarise(share = n()/n[1]*100)
}

{
  pa21<- 
    ggplot(gp.resp.hq.bygov, aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "GP Resp.: Headquarters")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  
  pa22<-
    ggplot(gp.resp.invregion.bygov,aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 35, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "GP Resp.: Investment Region")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa23<-
    ggplot(gp.resp.ind.bygov,aes(industry_group, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Industry Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "GP Resp.: Investment Industry")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 1,size = rel(0.8)))
  
  pa24<-
    ggplot(lp.resp.hq.bygov,aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "LP Resp.: Headquarters")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa25<-
    ggplot(lp.resp.invregion.bygov,aes(coastal, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 40, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Region Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "LP Resp.: Investment Region")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
  
  pa26<-
    ggplot(lp.resp.ind.bygov,aes(industry_group, weight = share, fill = Government))+
    geom_hline(yintercept = seq(0, 30, 5), color = 'gray') +
    geom_bar(color = "black", width = .7,position = 'dodge')+
    coord_flip()+
    theme_classic()+
    ylab('Share (%)')+
    xlab('Industry Group')+
    scale_fill_brewer(palette = "Set1")+
    labs(title = "LP Resp.: Investment Industry")+
    theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 1,size = rel(0.8)))
  
  phighA2<-plot_grid(pa24,pa25,pa26,pa21,pa22,pa23,
                   labels = 'AUTO',
                   nrow=2)
  
  ggsave2(phighA2, filename = 'FigureA2.pdf',
          path = paste(DIR,"appendix figure/",sep=""),
          width = 14,
          height = 7)
  
}
## Figure A3. 2019 Experimental Survey: Recruitment Email: no code ####
## Figure A4. Job Positions of Targeted Respondents ####
library(tidyverse)
library(haven)
library(cowplot)
library(RColorBrewer)


data1 = read_dta("Data\\survey data used for analysis\\1000respondent_list.dta")
data=data1[!duplicated(data1$gp_fullname),]
data = data %>% mutate(
  position = ifelse(position=="Partner","Partner",
                    ifelse(position=="Other","Other",
                           ifelse(position=="Manager/Executive","Manager/Executive")))
)

dt.figure = rbind(
  rbind(data[data$gp==1,],data[data$lp==1,])%>%group_by(position)%>%summarise(n=n())%>%mutate(Group="All"),
  data[data$gp==1,]%>%group_by(position)%>%summarise(n=n())%>%mutate(Group="GP"),
  data[data$lp==1,]%>%group_by(position)%>%summarise(n=n())%>%mutate(Group="LP")
)

dt.figure$position = 
  factor(dt.figure$position,
         levels = c('Partner','Director','Manager/Executive','Other'),ordered = TRUE)

pp=ggplot(dt.figure,aes(position, weight = n, fill = Group))+
  geom_hline(yintercept = seq(0, 400, 50), color = 'gray') +
  geom_bar(color = "black", width = .7,position = 'dodge')+
  #coord_flip()+
  theme_classic()+
  scale_y_continuous(breaks = seq(0,400,50))+
  ylab(NULL)+
  xlab(NULL)+
  scale_fill_brewer(palette = "Set1")+
  #labs(title = "Inactive LP: Investment Region")+
  theme(axis.text.y = element_text(angle = 20, hjust = 1, vjust = 0))
pp
ggsave2(pp, filename = 'FigureA4.pdf',
        path = paste(DIR,"appendix figure/",sep=""),
        width = 11,
        height = 5.5)

## Figure A5. 2021 Qualitative Survey: no code ####


# Tables (using R) ####


# Tables ####

## table 1: Summary Statistics  ####
{
  # packages 
  # please install them if you don't have
  Sys.setlocale(, "Chinese")
  library(readxl)
  library(tidyverse)
  library(showtext)
  library(stringr)
  library(rlist)
  library(haven)
  library(knitr)
  library(kableExtra)
  library(lubridate)
  library(stargazer)
  library(haven)
  library(DescTools)
  library(cowplot)
  library(RColorBrewer)
  library(foreign)
}

{
  gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  gp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\gpindustry.dta")
  lp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\lpindustry.dta")
  gp.region<-read_dta("Data\\Zero2IPO data used for analysis\\gpregion.dta")
  lp.region<-read_dta("Data\\Zero2IPO data used for analysis\\lpregion.dta")
}  
{
  # using gov >= 0 as gov
  gp.resp <- gp1519[gp1519$gp_respondent==1,]
  gp.gov.resp = gp.resp[gp.resp$govgp==1,]
  gp.nongov.resp = gp.resp[gp.resp$govgp==0,]
  gp.gov.active = gp1519[gp1519$govgp==1,]
  gp.nongov.active = gp1519[gp1519$govgp==0,]
  
  mean.noNA = function(x){
    mean(x,na.rm = TRUE)
  }
  mean.gp.all.resp = apply(cbind(gp.resp$gp_total_gov_share,
                                 Winsorize(gp.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                 Winsorize(gp.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                 gp.resp$Num_mgt_fund,
                                 gp.resp$Number_investment,gp.resp$Number_exits,mean(2019-gp.resp$founding_year,na.rm = T)),2,mean.noNA)
  mean.gp.gov.resp = apply(cbind(gp.gov.resp$gp_total_gov_share,
                                 Winsorize(gp.gov.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                 Winsorize(gp.gov.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                 gp.gov.resp$Num_mgt_fund,
                                 gp.gov.resp$Number_investment,
                                 gp.gov.resp$Number_exits, mean(2019-gp.resp$founding_year[!is.na(gp.resp$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
  mean.gp.nongov.resp = apply(cbind(gp.nongov.resp$gp_total_gov_share,
                                    Winsorize(gp.nongov.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                    Winsorize(gp.nongov.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                    gp.nongov.resp$Num_mgt_fund,
                                    gp.nongov.resp$Number_investment,gp.nongov.resp$Number_exits,mean(2019-gp.resp$founding_year[is.na(gp.resp$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
  mean.gp.all.active = apply(cbind(gp1519$gp_total_gov_share,
                                   Winsorize(gp1519$AUMrmb, probs = c(0,0.95), na.rm = T) ,
                                   Winsorize(gp1519$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp1519$Num_mgt_fund,
                                   gp1519$Number_investment,gp1519$Number_exits,mean(2019-gp1519$founding_year,na.rm = T)),2,mean.noNA)
  mean.gp.gov.active = apply(cbind(gp.gov.active$gp_total_gov_share,
                                   Winsorize(gp.gov.active$AUMrmb, probs = c(0,0.95), na.rm = T) ,
                                   Winsorize(gp.gov.active$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp.gov.active$Num_mgt_fund,
                                   gp.gov.active$Number_investment,gp.gov.active$Number_exits, mean(2019-gp1519$founding_year[!is.na(gp1519$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
  mean.gp.nongov.active = apply(cbind(gp.nongov.active$gp_total_gov_share,
                                      Winsorize(gp.nongov.active$AUMrmb, probs = c(0,0.95), na.rm = T)  ,
                                      Winsorize(gp.nongov.active$gp_irr, probs = c(0,0.95), na.rm = T),
                                      gp.nongov.active$Num_mgt_fund,
                                      gp.nongov.active$Number_investment,gp.nongov.active$Number_exits, mean(2019-gp1519$founding_year[is.na(gp1519$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
  mean.gp.summary = cbind(mean.gp.all.active,mean.gp.gov.active,mean.gp.nongov.active,
                          mean.gp.all.resp,mean.gp.gov.resp,mean.gp.nongov.resp)
  mean.gp.summary = cbind(c('Share Government-Owned (%)','AUM($ millions)','IRR (% median)','Funds','Investments','Exits','Firm Age'),
                          as.data.frame(mean.gp.summary))
  colnames(mean.gp.summary) = c('',rep(c('All','Gov','NonGov'),2))
  
  lp.resp <- lp1519[lp1519$lp_respondent==1,]
  gplp <- gp1519[gp1519$lp_respondent==1,]
  lp.gov.resp = lp.resp[lp.resp$govlp==1,]
  lp.nongov.resp = lp.resp[lp.resp$govlp==0,]
  gplp.gov <- gplp[gplp$govgp==1,]
  gplp.nongov <- gplp[gplp$govgp==0,]
  lp.gov.active = lp1519[lp1519$govlp==1,]
  lp.nongov.active = lp1519[lp1519$govlp==0,]
  mean.lp.all.resp = apply(cbind(lp.resp$lp_total_gov_share,
                                 Winsorize(lp.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                 lp.resp$Num_of_fund_link, mean(2019-c(lp.resp$founding_year,gplp$founding_year),na.rm = T)),2,mean.noNA)
  mean.lp.gov.resp = apply(cbind(lp.gov.resp$lp_total_gov_share,
                                 Winsorize(lp.gov.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                 lp.gov.resp$Num_of_fund_link,mean(2019-c(lp.resp$founding_year[!is.na(lp.resp$lp_total_gov_share)],gplp.gov$founding_year),na.rm = T)),2,mean.noNA)
  mean.lp.nongov.resp = apply(cbind(lp.nongov.resp$lp_total_gov_share,
                                    Winsorize(lp.nongov.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                    lp.nongov.resp$Num_of_fund_link, mean(2019-c(lp.resp$founding_year[is.na(lp.resp$lp_total_gov_share)],gplp.nongov$founding_year),na.rm = T)),2,mean.noNA)
  mean.lp.all.active = apply(cbind(lp1519$lp_total_gov_share,
                                   Winsorize( lp1519$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                   lp1519$Num_of_fund_link,mean(2019-lp1519$founding_year,na.rm = T)),2,mean.noNA)
  mean.lp.gov.active = apply(cbind(lp.gov.active$lp_total_gov_share,
                                   Winsorize(lp.gov.active$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                   lp.gov.active$Num_of_fund_link, mean(2019-lp1519$founding_year[!is.na(lp1519$lp_total_gov_share)],na.rm = T)),2,mean.noNA)
  mean.lp.nongov.active = apply(cbind(lp.nongov.active$lp_total_gov_share,
                                      Winsorize(lp.nongov.active$Total_invest_in_fund, probs = c(0,0.95), na.rm = T)   ,
                                      lp.nongov.active$Num_of_fund_link,mean(2019-lp1519$founding_year[is.na(lp1519$lp_total_gov_share)],na.rm = T)),2,mean.noNA)
  mean.lp.summary = cbind(mean.lp.all.active,mean.lp.gov.active,mean.lp.nongov.active,
                          mean.lp.all.resp,mean.lp.gov.resp,mean.lp.nongov.resp)
  
  
  mean.lp.summary = cbind(c('Share Government-Owned (%)','Capital Invested($ millions)','Fund Invested','Firm Age'),as.data.frame(mean.lp.summary))
  colnames(mean.lp.summary) = c('',rep(c('All','Gov','NonGov'),2))
  
  mean.gp.summary[1,c(4,7)] = 0
  mean.lp.summary[1,c(4,7)] = 0
  mean.gp.summary[1,c(3,6)] = 100
  mean.lp.summary[1,c(3,6)] = 100
  
  mean.gp.summary[1,2] = length(which(gp1519$govgp==1))/nrow(gp1519)*100
  mean.gp.summary[1,5] = length(which(gp.resp$govgp==1))/nrow(gp.resp)*100
  
  mean.lp.summary[1,2] = length(which(lp1519$govlp==1))/nrow(lp1519)*100
  mean.lp.summary[1,5] = (length(which(lp.resp$govlp==1))+length(which(gplp$govgp==1)))/(nrow(lp.resp)+nrow(gplp))*100

  mean.gp.summary[3,2] = median(Winsorize(gp1519$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,3] = median(Winsorize(gp1519$gp_irr[which(gp1519$govgp==1)], probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,4] = median(Winsorize(gp1519$gp_irr[which(gp1519$govgp==0)], probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,5] = median(Winsorize(gp.resp$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,6] = median(Winsorize(gp.resp$gp_irr[which(gp.resp$govgp==1)], probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,7] = median(Winsorize(gp.resp$gp_irr[which(gp.resp$govgp==0)], probs = c(0,0.95), na.rm = T), na.rm = T)
}



{# output table
  rm(list = ls()[which(!ls()%in%c("mean.gp.summary","mean.lp.summary"))])
 
  
  # lp part
  table1_lp<-kbl(mean.lp.summary, caption = 'LPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ",  "Active" = 3,"Respondent" = 3)) %>%
    kable_styling(latex_options = c("HOLD_position"))
  cat(table1_lp,file = 'Output\\main table\\Table1_PanelA.tex')
      
  
  # gp part
  table1_gp<-kbl(mean.gp.summary, caption = 'GPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ", "Active" = 3,"Respondent" = 3 )) %>%
    kable_styling(latex_options = c("HOLD_position"))
  
  cat(table1_gp,file = 'Output\\main table\\Table1_PanelB.tex')
      
} 
## Table 2: Government Ownership of Investors and Fund Managers ####
{
  gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  gp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\gpindustry.dta")
  lp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\lpindustry.dta")
  gp.region<-read_dta("Data\\Zero2IPO data used for analysis\\gpregion.dta")
  lp.region<-read_dta("Data\\Zero2IPO data used for analysis\\lpregion.dta")
}
{
  rm(list = ls())
  load("Data\\Zero2IPO data used for analysis\\GPLP_Ownership.rdata")
  gpactiveown = gpactiveown[-which(!gpactiveown$gp_fullname%in%gp1519$gp_fullname),]
  lpactiveown.new = lpactiveown.new[-which(!lpactiveown.new$lp_fullname%in%lp1519$lp_fullname),]
  gpactiveown = gpactiveown[,-which(colnames(gpactiveown)=="hq_province_group")]
  lpactiveown.new = lpactiveown.new[,-which(colnames(lpactiveown.new)=="hq_province_group")]
  lprespown.new = lprespown.new[,-which(colnames(lprespown.new)=="hq_province_group")]
  lprespown.new = lprespown.new[-which(!lprespown.new$lp_fullname%in%lp1519$lp_fullname[which(lp1519$lp_respondent==1)]&
                                         !lprespown.new$lp_fullname%in%gp1519$gp_fullname[which(gp1519$lp_respondent==1)]),]
  lprespown.new$t_g_share_s[lprespown.new$t_g_share_s<=0]=0
  lpactiveown.new$t_g_share_s[lpactiveown.new$t_g_share>0] = 
    lpactiveown.new$t_g_share[lpactiveown.new$t_g_share>0]-
    lpactiveown.new$t_g_share_a[lpactiveown.new$t_g_share>0]
  lpactiveown.new$t_g_share_s[lpactiveown.new$t_g_share_s<=0]=0
  lprespown.new$t_g_share_s[lprespown.new$t_g_share>0] = 
    lprespown.new$t_g_share[lprespown.new$t_g_share>0]-
    lprespown.new$t_g_share_a[lprespown.new$t_g_share>0]
  gpactiveown$t_g_share_s = gpactiveown$t_g_share - gpactiveown$t_g_share_a
  gpactiveown = gpactiveown[-which(gpactiveown$gov==1&gpactiveown$t_g_share==0),]
  lpactiveown.new = lpactiveown.new[-which(lpactiveown.new$gov==1&lpactiveown.new$t_g_share==0),]
  lprespown.new = lprespown.new[-which(lprespown.new$gov==1&lprespown.new$t_g_share==0),]
}

{
  lp.summary.table2=rbind(
    c("","Min","p10","p25","Median","Mean","p75","p90","Max","Min","p10","p25","Median","Mean","p75","p90","Max"),
    c(
      "lp total gov share",
      round(c(quantile(lpactiveown.new$t_g_share,probs = c(0, .1, .25, .5)),
              mean(lpactiveown.new$t_g_share),
              quantile(lpactiveown.new$t_g_share,probs =c(.75, .9, 1))),digits = 2),
      round(c(quantile(lprespown.new$t_g_share,probs = c(0, .1, .25, .5)),
              mean(lprespown.new$t_g_share),
              quantile(lprespown.new$t_g_share,probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "lp gov share within gov",
      round(c(quantile(lpactiveown.new$t_g_share[lpactiveown.new$gov==1],probs = c(0, .1, .25, .5)),
              mean(lpactiveown.new$t_g_share[lpactiveown.new$gov==1]),
              quantile(lpactiveown.new$t_g_share[lpactiveown.new$gov==1],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(lprespown.new$t_g_share[lprespown.new$gov==1],probs = c(0, .1, .25, .5)),
              mean(lprespown.new$t_g_share[lprespown.new$gov==1]),
              quantile(lprespown.new$t_g_share[lprespown.new$gov==1],probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "lp central gov share",
      round(c(quantile(lpactiveown.new$c_g_share[lpactiveown.new$c_g_share>0],probs = c(0, .1, .25, .5)),
              mean(lpactiveown.new$c_g_share[lpactiveown.new$c_g_share>0]),
              quantile(lpactiveown.new$c_g_share[lpactiveown.new$c_g_share>0],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(lprespown.new$c_g_share[lprespown.new$c_g_share>0],probs = c(0, .1, .25, .5)),
              mean(lprespown.new$c_g_share[lprespown.new$c_g_share>0]),
              quantile(lprespown.new$c_g_share[lprespown.new$c_g_share>0],probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "lp provincial gov share",
      round(c(quantile(lpactiveown.new$p_g_share[lpactiveown.new$p_g_share>0],probs = c(0, .1, .25, .5)),
              mean(lpactiveown.new$p_g_share[lpactiveown.new$p_g_share>0]),
              quantile(lpactiveown.new$p_g_share[lpactiveown.new$p_g_share>0],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(lprespown.new$p_g_share[lprespown.new$p_g_share>0],probs = c(0, .1, .25, .5)),
              mean(lprespown.new$p_g_share[lprespown.new$p_g_share>0]),
              quantile(lprespown.new$p_g_share[lprespown.new$p_g_share>0],probs = c(.75, .9, 1))),digits = 2)),
    c(
      "lp local gov share",
      round(c(quantile(lpactiveown.new$l_g_share[lpactiveown.new$l_g_share>0],probs = c(0, .1, .25, .5)),
              mean(lpactiveown.new$l_g_share[lpactiveown.new$l_g_share>0]),
              quantile(lpactiveown.new$l_g_share[lpactiveown.new$l_g_share>0],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(lprespown.new$l_g_share[lprespown.new$l_g_share>0],probs = c(0, .1, .25, .5)),
              mean(lprespown.new$l_g_share[lprespown.new$l_g_share>0]),
              quantile(lprespown.new$l_g_share[lprespown.new$l_g_share>0],probs = c(.75, .9, 1))),digits = 2)))
  gp.summary.table2=rbind(
    c("","Min","p10","p25","Median","Mean","p75","p90","Max","Min","p10","p25","Median","Mean","p75","p90","Max"),
    c(
      "gp total gov share",
      round(c(quantile(gpactiveown$t_g_share,probs = c(0, .1, .25, .5)),
              mean(gpactiveown$t_g_share),
              quantile(gpactiveown$t_g_share,probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(gpactiveown$t_g_share[gpactiveown$gp_respondent==1],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$t_g_share[gpactiveown$gp_respondent==1]),
              quantile(gpactiveown$t_g_share[gpactiveown$gp_respondent==1],probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "gp gov share with gov",
      round(c(quantile(gpactiveown$t_g_share[gpactiveown$gov==1],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$t_g_share[gpactiveown$gov==1]),
              quantile(gpactiveown$t_g_share[gpactiveown$gov==1],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(gpactiveown$t_g_share[gpactiveown$gp_respondent==1&gpactiveown$gov==1],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$t_g_share[gpactiveown$gp_respondent==1&gpactiveown$gov==1]),
              quantile(gpactiveown$t_g_share[gpactiveown$gp_respondent==1&gpactiveown$gov==1],probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "gp central gov share",
      round(c(quantile(gpactiveown$c_g_share[gpactiveown$c_g_share>0],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$c_g_share[gpactiveown$c_g_share>0]),
              quantile(gpactiveown$c_g_share[gpactiveown$c_g_share>0],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(gpactiveown$c_g_share[gpactiveown$gp_respondent==1&gpactiveown$c_g_share>0],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$c_g_share[gpactiveown$gp_respondent==1&gpactiveown$c_g_share>0]),
              quantile(gpactiveown$c_g_share[gpactiveown$gp_respondent==1&gpactiveown$c_g_share>0],probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "gp provincial gov share",
      round(c(quantile(gpactiveown$p_g_share[gpactiveown$p_g_share>0],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$p_g_share[gpactiveown$p_g_share>0]),
              quantile(gpactiveown$p_g_share[gpactiveown$p_g_share>0],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(gpactiveown$p_g_share[gpactiveown$gp_respondent==1&gpactiveown$p_g_share>0],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$p_g_share[gpactiveown$gp_respondent==1&gpactiveown$p_g_share>0]),
              quantile(gpactiveown$p_g_share[gpactiveown$gp_respondent==1&gpactiveown$p_g_share>0],probs = c(.75, .9, 1))),digits = 2)
    ),
    c(
      "gp local gov share",
      round(c(quantile(gpactiveown$l_g_share[gpactiveown$l_g_share>0],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$l_g_share[gpactiveown$l_g_share>0]),
              quantile(gpactiveown$l_g_share[gpactiveown$l_g_share>0],probs = c(.75, .9, 1))),digits = 2),
      round(c(quantile(gpactiveown$l_g_share[gpactiveown$gp_respondent==1&gpactiveown$l_g_share>0],probs = c(0, .1, .25, .5)),
              mean(gpactiveown$l_g_share[gpactiveown$gp_respondent==1&gpactiveown$l_g_share>0]),
              quantile(gpactiveown$l_g_share[gpactiveown$gp_respondent==1&gpactiveown$l_g_share>0],probs = c(.75, .9, 1))),digits = 2)
    ))
  
}
{# output table
  rm(list = ls()[which(!ls()%in%c("gp.summary.table2","lp.summary.table2"))])
 
  
  # lp part
  table2_lp<-kbl(lp.summary.table2, caption = 'LPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ",  "Active" = 8,"Respondent" = 8)) %>%
    kable_styling(latex_options = c("HOLD_position"))
  cat(table2_lp,file = 'Output\\main table\\Table2_PanelA.tex')
  # gp part
  table2_gp<-kbl(gp.summary.table2, caption = 'GPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ", "Active" = 8,"Respondent" = 8 )) %>%
    kable_styling(latex_options = c("HOLD_position"))
  cat(table2_gp,file = 'Output\\main table\\Table2_PanelB.tex')
}
## Table A1. Comparing Active and Inactive Entities in the ZeroIPO Database ####

{
  gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  gp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\gpindustry.dta")
  lp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\lpindustry.dta")
  gp.region<-read_dta("Data\\Zero2IPO data used for analysis\\gpregion.dta")
  lp.region<-read_dta("Data\\Zero2IPO data used for analysis\\lpregion.dta")
}  
mean.noNA = function(x){
  mean(x,na.rm = TRUE)
}

  gp1<-read_dta("Data\\Zero2IPO data used for analysis\\Inactive_GP.dta")
  lp1<-read_dta("Data\\Zero2IPO data used for analysis\\Inactive_LP.dta")
  gp1$AUMrmb<-gp1$AUMrmb/7
  lp1$InvestmentAmountRMB<-lp1$InvestmentAmountRMB/7
  mean.gp.all.inactive= apply(cbind(0,Winsorize(gp1$AUMrmb, probs = c(0,0.95), na.rm = T),
                         Winsorize(gp1$gp_irr, probs = c(0,0.95), na.rm = T),
                         gp1$Number_mgt_fund,
                         gp1$Number_investment,gp1$Number_exits,mean(2019-gp1$founding_year,na.rm = T)),2,mean.noNA)
  
  mean.gp.all.active = apply(cbind(gp1519$gp_total_gov_share,
                                   Winsorize(gp1519$AUMrmb, probs = c(0,0.95), na.rm = T) ,
                                   Winsorize(gp1519$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp1519$Num_mgt_fund,
                                   gp1519$Number_investment,gp1519$Number_exits,mean(2019-gp1519$founding_year,na.rm = T)),2,mean.noNA)
  
  mean.gp.summaryA1 = cbind(mean.gp.all.active,mean.gp.all.inactive)
  mean.gp.summaryA1 = cbind(c('Share Government-Owned (%)','AUM($ millions)','IRR (% median)','Funds','Investments','Exits','Firm Age'),
                          as.data.frame(mean.gp.summaryA1))
  colnames(mean.gp.summaryA1) = c('',rep(c('All'),2))
  mean.lp.all.inactive = apply(cbind(0,Winsorize(lp1$InvestmentAmountRMB, probs = c(0,0.95), na.rm = T),
                         lp1$number_fund_invested, mean(2019-lp1$founding_year,na.rm = T)),2,mean.noNA)
  
  mean.lp.all.active = apply(cbind(lp1519$lp_total_gov_share,
                                   Winsorize( lp1519$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                   lp1519$Num_of_fund_link,mean(2019-lp1519$founding_year,na.rm = T)),2,mean.noNA)
  mean.lp.summaryA1 = cbind(mean.lp.all.active,mean.lp.all.inactive)
  mean.lp.summaryA1 = cbind(c('Share Government-Owned (%)','Capital Invested($ millions)','Fund Invested','Firm Age'),
                            as.data.frame(mean.lp.summaryA1))
  colnames(mean.lp.summaryA1) = c('',rep(c('All'),2))
  
  mean.gp.summaryA1[1,2] = length(which(gp1519$govgp==1))/nrow(gp1519)*100
  mean.gp.summaryA1[1,3] = "NA"
  mean.gp.summaryA1[3,2] = median(Winsorize(gp1519$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summaryA1[3,3]=median(Winsorize(gp1$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.lp.summaryA1[1,2] = length(which(lp1519$govlp==1))/nrow(lp1519)*100
  mean.lp.summaryA1[1,3] = "NA"
  {  
  table=rm(list = ls()[which(!ls()%in%c("mean.lp.summaryA1","mean.gp.summaryA1"))])
  # lp part
  tableA1_lp<-kbl(mean.lp.summaryA1, caption = 'LPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ", "Active" = 1, "Inactive" = 1)) %>%
    kable_styling(latex_options = c("HOLD_position"))
  
  cat(tableA1_lp,file = 'Output\\appendix table\\TableA1_PanelA.tex')
    
  
  # gp part
  tableA1_gp<-kbl(mean.gp.summaryA1, caption = 'GPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ", "Active" = 1, "Inactive" = 1)) %>%
    kable_styling(latex_options = c("HOLD_position"))}
  
  cat(tableA1_gp,file = 'Output\\appendix table\\TableA1_PanelB.tex')
## Table A2. Comparing Respondents and Non-Respondents ####
  
  { gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
    lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
    gp15191<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_adding_nonrespond.dta")
    lp15191<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_adding_nonrespond.dta")
  }  
    
  {
    # using gov >= 0 as gov
    gp.resp <- gp1519[gp1519$gp_respondent==1,]
    gp.gov.resp = gp.resp[gp.resp$govgp==1,]
    gp.nongov.resp = gp.resp[gp.resp$govgp==0,]
    gp.nonresp <- gp15191[gp15191$gp_respondent==1,]
    gp.gov.nonresp = gp.nonresp[gp.nonresp$govgp==1,]
    gp.nongov.nonresp = gp.nonresp[gp.nonresp$govgp==0,]
    mean.noNA = function(x){
      mean(x,na.rm = TRUE)
    }
    mean.gp.all.resp = apply(cbind(gp.resp$gp_total_gov_share,
                                   Winsorize(gp.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                   Winsorize(gp.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp.resp$Num_mgt_fund,
                                   gp.resp$Number_investment,gp.resp$Number_exits,mean(2019-gp.resp$founding_year,na.rm = T)),2,mean.noNA)
    mean.gp.gov.resp = apply(cbind(gp.gov.resp$gp_total_gov_share,
                                   Winsorize(gp.gov.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                   Winsorize(gp.gov.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp.gov.resp$Num_mgt_fund,
                                   gp.gov.resp$Number_investment,
                                   gp.gov.resp$Number_exits, mean(2019-gp.resp$founding_year[!is.na(gp.resp$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
    mean.gp.nongov.resp = apply(cbind(gp.nongov.resp$gp_total_gov_share,
                                      Winsorize(gp.nongov.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                      Winsorize(gp.nongov.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                      gp.nongov.resp$Num_mgt_fund,
                                      gp.nongov.resp$Number_investment,gp.nongov.resp$Number_exits,mean(2019-gp.resp$founding_year[is.na(gp.resp$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
    
    mean.gp.all.nonresp = apply(cbind(gp.nonresp$gp_total_gov_share,
                                   Winsorize(gp.nonresp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                   Winsorize(gp.nonresp$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp.nonresp$Num_mgt_fund,
                                   gp.nonresp$Number_investment,gp.nonresp$Number_exits,mean(2019-gp.nonresp$founding_year,na.rm = T)),2,mean.noNA)
    mean.gp.gov.nonresp = apply(cbind(gp.gov.nonresp$gp_total_gov_share,
                                   Winsorize(gp.gov.nonresp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                   Winsorize(gp.gov.nonresp$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp.gov.nonresp$Num_mgt_fund,
                                   gp.gov.nonresp$Number_investment,
                                   gp.gov.nonresp$Number_exits, mean(2019-gp.nonresp$founding_year[!is.na(gp.nonresp$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
    mean.gp.nongov.nonresp = apply(cbind(gp.nongov.nonresp$gp_total_gov_share,
                                      Winsorize(gp.nongov.nonresp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                      Winsorize(gp.nongov.nonresp$gp_irr, probs = c(0,0.95), na.rm = T),
                                      gp.nongov.nonresp$Num_mgt_fund,
                                      gp.nongov.nonresp$Number_investment,gp.nongov.nonresp$Number_exits,mean(2019-gp.nonresp$founding_year[is.na(gp.nonresp$gp_total_gov_share)],na.rm = T)),2,mean.noNA)
    
    mean.gp.summaryA2 = cbind(mean.gp.all.resp,mean.gp.gov.resp,mean.gp.nongov.resp,mean.gp.all.nonresp,mean.gp.gov.nonresp,mean.gp.nongov.nonresp)
    mean.gp.summaryA2 = cbind(c('Share Government-Owned (%)','AUM($ millions)','IRR (% median)','Funds','Investments','Exits','Firm Age'),
                            as.data.frame(mean.gp.summaryA2))
    colnames(mean.gp.summaryA2) = c('',rep(c('All','Gov','NonGov'),2))
    
    lp.resp <- lp1519[lp1519$lp_respondent==1,]
    lp.nonresp <- lp15191[lp15191$lp_respondent==1,]
    gplp <- gp1519[gp1519$lp_respondent==1,]
    gplp1 <- gp15191[gp15191$lp_respondent==1,]
    lp.gov.resp = lp.resp[lp.resp$govlp==1,]
    lp.nongov.resp = lp.resp[lp.resp$govlp==0,]
    lp.gov.nonresp = lp.nonresp[lp.nonresp$govlp==1,]
    lp.nongov.nonresp = lp.nonresp[lp.nonresp$govlp==0,]
    gplp.gov <- gplp[gplp$govgp==1,]
    gplp.nongov <- gplp[gplp$govgp==0,]
    gplp1.gov <- gplp1[gplp1$govgp==1,]
    gplp1.nongov <- gplp1[gplp1$govgp==0,]
    
    mean.lp.all.resp = apply(cbind(lp.resp$lp_total_gov_share,
                                   Winsorize(lp.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                   lp.resp$Num_of_fund_link, mean(2019-c(lp.resp$founding_year,gplp$founding_year),na.rm = T)),2,mean.noNA)
    mean.lp.gov.resp = apply(cbind(lp.gov.resp$lp_total_gov_share,
                                   Winsorize(lp.gov.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                   lp.gov.resp$Num_of_fund_link,mean(2019-c(lp.resp$founding_year[!is.na(lp.resp$lp_total_gov_share)],gplp.gov$founding_year),na.rm = T)),2,mean.noNA)
    mean.lp.nongov.resp = apply(cbind(lp.nongov.resp$lp_total_gov_share,
                                      Winsorize(lp.nongov.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                      lp.nongov.resp$Num_of_fund_link, mean(2019-c(lp.resp$founding_year[is.na(lp.resp$lp_total_gov_share)],gplp.nongov$founding_year),na.rm = T)),2,mean.noNA)
    mean.lp.all.nonresp = apply(cbind(lp.nonresp$lp_total_gov_share,
                                   Winsorize(lp.nonresp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                   lp.nonresp$Num_of_fund_link, mean(2019-c(lp.nonresp$founding_year,gplp1$founding_year),na.rm = T)),2,mean.noNA)
    mean.lp.gov.nonresp = apply(cbind(lp.gov.nonresp$lp_total_gov_share,
                                   Winsorize(lp.gov.nonresp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                   lp.gov.nonresp$Num_of_fund_link,mean(2019-c(lp.nonresp$founding_year[!is.na(lp.nonresp$lp_total_gov_share)],gplp1.gov$founding_year),na.rm = T)),2,mean.noNA)
    mean.lp.nongov.nonresp = apply(cbind(lp.nongov.nonresp$lp_total_gov_share,
                                      Winsorize(lp.nongov.nonresp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                      lp.nongov.nonresp$Num_of_fund_link, mean(2019-c(lp.nonresp$founding_year[is.na(lp.nonresp$lp_total_gov_share)],gplp1.nongov$founding_year),na.rm = T)),2,mean.noNA)
    
    mean.lp.summaryA2 = cbind(mean.lp.all.resp,mean.lp.gov.resp,mean.lp.nongov.resp,
                            mean.lp.all.nonresp,mean.lp.gov.nonresp,mean.lp.nongov.nonresp)
                            
    
    
    mean.lp.summaryA2 = cbind(c('Share Government-Owned (%)','Capital Invested($ millions)','Fund Invested','Firm Age'),as.data.frame(mean.lp.summaryA2))
    colnames(mean.lp.summaryA2) = c('',rep(c('All','Gov','NonGov'),2))

  
    mean.gp.summaryA2[1,c(4,7)] = 0
    mean.lp.summaryA2[1,c(4,7)] = 0
    mean.gp.summaryA2[1,c(3,6)] = 100
    mean.lp.summaryA2[1,c(3,6)] = 100
    
    mean.gp.summaryA2[1,2] = length(which(gp.resp$govgp==1))/nrow(gp.resp)*100
    mean.gp.summaryA2[1,5] = length(which(gp.nonresp$govgp==1))/nrow(gp.nonresp)*100
    
    mean.lp.summaryA2[1,2] = (length(which(lp.resp$govlp==1))+length(which(gplp$govgp==1)))/(nrow(lp.resp)+nrow(gplp))*100
    mean.lp.summaryA2[1,5] = (length(which(lp.nonresp$govlp==1))+length(which(gplp1$govgp==1)))/(nrow(lp.nonresp)+nrow(gplp1))*100
    
    mean.gp.summaryA2[3,2] = median(Winsorize(gp.resp$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
    mean.gp.summaryA2[3,3] = median(Winsorize(gp.resp$gp_irr[which(gp.resp$govgp==1)], probs = c(0,0.95), na.rm = T), na.rm = T)
    mean.gp.summaryA2[3,4] = median(Winsorize(gp.resp$gp_irr[which(gp.resp$govgp==0)], probs = c(0,0.95), na.rm = T), na.rm = T)
    mean.gp.summaryA2[3,5] = median(Winsorize(gp.nonresp$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
    mean.gp.summaryA2[3,6] = median(Winsorize(gp.nonresp$gp_irr[which(gp.nonresp$govgp==1)], probs = c(0,0.95), na.rm = T), na.rm = T)
    mean.gp.summaryA2[3,7] = median(Winsorize(gp.nonresp$gp_irr[which(gp.nonresp$govgp==0)], probs = c(0,0.95), na.rm = T), na.rm = T)
  }
  
  
  
  {# output table
    rm(list = ls()[which(!ls()%in%c("mean.gp.summaryA2","mean.lp.summaryA2"))])
    
    # lp part
    tableA2_lp<-kbl(mean.lp.summaryA2, caption = 'LPs',digits = 2, booktabs = T ) %>%
      add_header_above(c(" ", "Respondents" = 3 ,"Non-Respondents" = 3 )) %>%
      kable_styling(latex_options = c("HOLD_position"))
    cat(tableA2_lp,file = 'Output\\appendix table\\TableA2_PanelA.tex')
    # gp part
    tableA2_gp<-kbl(mean.gp.summaryA2, caption = 'GPs',digits = 2, booktabs = T ) %>%
      add_header_above(c(" ", "Respondents" = 3 ,"Non-Respondents" = 3 )) %>%
      kable_styling(latex_options = c("HOLD_position"))
    cat(tableA2_gp,file = 'Output\\appendix table\\TableA2_PanelB.tex')
  }
  ## Table A5. LP Type Distribution ####
  # drop foreign
  {
    rm(list = ls())
    lp = read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
    gp = read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
    gplp = gp[gp$lp_respondent==1,];rm(gp)

    # re-group
    lp$lptype_en[which(lp$lptype_en=="VC/PE Investment Institution")]="VC/PE"
    lp$lptype_en[which(lp$lptype_en=="Government Guide Fund"|lp$lptype_en=="Government Bureau")]="Government Bureau and Guided Fund"
    lp$lptype_en[which(lp$lptype_en=="")] = "Others"
    lp$lptype_en[which(lp$lptype_en=="Bank"|lp$lptype_en=="Insurance Company")]="Bank and Insurance Company"
    lp$lptype_en[which(lp$lptype_en=="Family Office"|lp$lptype_en=="University Fund")]="Others"
    gplp$gptype_en[which(gplp$gptype_en=="PE"|gplp$gptype_en=="VC")] = "VC/PE"
    gplp$gptype_en[which(gplp$gptype_en=="Strategic Investors"|gplp$gptype_en=="Early Investment Firm")] = "Investment Company"
    gplp = gplp[,c(1,9,3,4,11)]
    lp.resp = lp[which(lp$lp_respondent==1),c(1,3,2,4,8)]
    colnames(gplp)=colnames(lp.resp)
    lp.resp = rbind(lp.resp,gplp)
    rm(gplp)
    lp$InvestmentAmountRMB[which(is.na(lp$InvestmentAmountRMB))] = 0
    lptype.active = lp %>% group_by(lptype_en) %>% 
      summarise(invest = sum(InvestmentAmountRMB, na.rm = T)) %>%
      mutate(totalinvest=sum(invest,na.rm = T)) %>% mutate(share=invest/totalinvest*100) %>% .[,c(1,4)]
    lptype.active.gov = lp[lp$govlp==1,] %>% group_by(lptype_en) %>% 
      summarise(invest = sum(InvestmentAmountRMB, na.rm = T)) %>%
      mutate(totalinvest=sum(invest,na.rm = T)) %>% mutate(share=invest/totalinvest*100) %>% .[,c(1,4)]
    lptype.active.nongov = lp[lp$govlp==0,] %>% group_by(lptype_en) %>% 
      summarise(invest = sum(InvestmentAmountRMB, na.rm = T)) %>%
      mutate(totalinvest=sum(invest,na.rm = T)) %>% mutate(share=invest/totalinvest*100) %>% .[,c(1,4)]
    lptype.resp = lp.resp %>% group_by(lptype_en) %>% 
      summarise(invest = sum(InvestmentAmountRMB, na.rm = T)) %>%
      mutate(totalinvest=sum(invest,na.rm = T)) %>% mutate(share=invest/totalinvest*100) %>% .[,c(1,4)]
    lptype.resp.gov = lp.resp[!is.na(lp.resp$lp_total_gov_share),] %>% group_by(lptype_en) %>% 
      summarise(invest = sum(InvestmentAmountRMB, na.rm = T)) %>%
      mutate(totalinvest=sum(invest,na.rm = T)) %>% mutate(share=invest/totalinvest*100) %>% .[,c(1,4)]
    lptype.resp.nongov = lp.resp[is.na(lp.resp$lp_total_gov_share),] %>% group_by(lptype_en) %>% 
      summarise(invest = sum(InvestmentAmountRMB, na.rm = T)) %>%
      mutate(totalinvest=sum(invest,na.rm = T)) %>% mutate(share=invest/totalinvest*100) %>% .[,c(1,4)]
    lptype = lptype.active %>% left_join(lptype.active.gov, by = "lptype_en") %>%
      left_join(lptype.active.nongov, by = "lptype_en") %>%
      left_join(lptype.resp, by = "lptype_en") %>%
      left_join(lptype.resp.gov, by = "lptype_en") %>%
      left_join(lptype.resp.nongov, by = "lptype_en")
    colnames(lptype) = c('',rep(c('All','Gov','NonGov'),2))
    rm(list = ls()[which(ls()!="lptype")])
    print(lptype) 
    tableA5<-kbl(lptype, caption = 'LP Type Distribution',digits = 2, booktabs = T ) %>%
      add_header_above(c(" ", "Active" = 3 ,"Respondent" = 3 )) %>%
      kable_styling(latex_options = c("HOLD_position"))
    cat(tableA5,file = 'Output\\appendix table\\TableA5.tex')}
  ## Table A6. Summary Statistics by Government Level ####
  
  # adjust foreign entities
  {
    rm(list = ls())
    options(digits = 5)
    # row1 of LP and GP are same with table1
    load("Data\\Zero2IPO data used for analysis\\GPLP_Ownership.rdata")
    gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
    lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
    gpactiveown = gpactiveown[-which(!gpactiveown$gp_fullname%in%gp1519$gp_fullname),]
    lpactiveown.new = lpactiveown.new[-which(!lpactiveown.new$lp_fullname%in%lp1519$lp_fullname),]
    gpactiveown = gpactiveown[,-which(colnames(gpactiveown)=="hq_province_group")]
    lpactiveown.new = lpactiveown.new[,-which(colnames(lpactiveown.new)=="hq_province_group")]
    lprespown.new = lprespown.new[,-which(colnames(lprespown.new)=="hq_province_group")]
    lprespown.new = lprespown.new[-which(!lprespown.new$lp_fullname%in%lp1519$lp_fullname[which(lp1519$lp_respondent==1)]&
                                           !lprespown.new$lp_fullname%in%gp1519$gp_fullname[which(gp1519$lp_respondent==1)]),]
    lprespown.new$t_g_share_s[lprespown.new$t_g_share_s<=0]=0
    lpactiveown.new$t_g_share_s[lpactiveown.new$t_g_share>0] = 
      lpactiveown.new$t_g_share[lpactiveown.new$t_g_share>0]-
      lpactiveown.new$t_g_share_a[lpactiveown.new$t_g_share>0]
    lpactiveown.new$t_g_share_s[lpactiveown.new$t_g_share_s<=0]=0
    lprespown.new$t_g_share_s[lprespown.new$t_g_share>0] = 
      lprespown.new$t_g_share[lprespown.new$t_g_share>0]-
      lprespown.new$t_g_share_a[lprespown.new$t_g_share>0]
    gpactiveown$t_g_share_s = gpactiveown$t_g_share - gpactiveown$t_g_share_a
    gpactiveown = gpactiveown[-which(gpactiveown$gov==1&gpactiveown$t_g_share==0),]
    lpactiveown.new = lpactiveown.new[-which(lpactiveown.new$gov==1&lpactiveown.new$t_g_share==0),]
    lprespown.new = lprespown.new[-which(lprespown.new$gov==1&lprespown.new$t_g_share==0),]
    #

    govtypeshareLP1 = 
      rbind(c(
        50.11,
        100,  
          0,
      77.52,
          100,
          0
        ),
        c(
          length(which(lpactiveown.new$c_g_share>0))/nrow(lpactiveown.new)*100,
          length(which(lpactiveown.new$c_g_share>0))/nrow(lpactiveown.new[lpactiveown.new$t_g_share>0,])*100,
          0,
          length(which(lprespown.new$c_g_share>0))/nrow(lprespown.new)*100,
          length(which(lprespown.new$c_g_share>0))/nrow(lprespown.new[lprespown.new$t_g_share>0,])*100,
          0
        ),
        c(
          length(which(lpactiveown.new$p_g_share>0))/nrow(lpactiveown.new)*100,
          length(which(lpactiveown.new$p_g_share>0))/nrow(lpactiveown.new[lpactiveown.new$t_g_share>0,])*100,
          0,
          length(which(lprespown.new$p_g_share>0))/nrow(lprespown.new)*100,
          length(which(lprespown.new$p_g_share>0))/nrow(lprespown.new[lprespown.new$t_g_share>0,])*100,
          0
        ),
        c(
          length(which(lpactiveown.new$l_g_share>0))/nrow(lpactiveown.new)*100,
          length(which(lpactiveown.new$l_g_share>0))/nrow(lpactiveown.new[lpactiveown.new$t_g_share>0,])*100,
          0,
          length(which(lprespown.new$l_g_share>0))/nrow(lprespown.new)*100,
          length(which(lprespown.new$l_g_share>0))/nrow(lprespown.new[lprespown.new$t_g_share>0,])*100,
          0
        ))
    
    govtypeshareLP= cbind(c('Share Government-Owned (%)','Share Central-Owned (%)','Share Provincial-Owned (%)','Share Local-Owned (%)'),as.data.frame(govtypeshareLP1))
    colnames(govtypeshareLP) = c('',rep(c('All','Gov','NonGov'),2))
    govtypeshareGP1 = 
      rbind(
        c(
          38.63,
          100,  
          0,
          32.05,
          100,
          0
        ),
        c(
          length(which(gpactiveown$c_g_share>0))/nrow(gpactiveown)*100,
          length(which(gpactiveown$c_g_share>0))/nrow(gpactiveown[gpactiveown$t_g_share>0,])*100,
          0,
          length(which(gpactiveown$c_g_share[gpactiveown$gp_respondent==1]>0))/nrow(gpactiveown[gpactiveown$gp_respondent==1,])*100,
          length(which(gpactiveown$c_g_share[gpactiveown$gp_respondent==1]>0))/nrow(gpactiveown[gpactiveown$t_g_share>0&gpactiveown$gp_respondent==1,])*100,
          0
        ),
        c(
          length(which(gpactiveown$p_g_share>0))/nrow(gpactiveown)*100,
          length(which(gpactiveown$p_g_share>0))/nrow(gpactiveown[gpactiveown$t_g_share>0,])*100,
          0,
          length(which(gpactiveown$p_g_share[gpactiveown$gp_respondent==1]>0))/nrow(gpactiveown[gpactiveown$gp_respondent==1,])*100,
          length(which(gpactiveown$p_g_share[gpactiveown$gp_respondent==1]>0))/nrow(gpactiveown[gpactiveown$t_g_share>0&gpactiveown$gp_respondent==1,])*100,
          0
        ),
        c(
          length(which(gpactiveown$l_g_share>0))/nrow(gpactiveown)*100,
          length(which(gpactiveown$l_g_share>0))/nrow(gpactiveown[gpactiveown$t_g_share>0,])*100,
          0,
          length(which(gpactiveown$l_g_share[gpactiveown$gp_respondent==1]>0))/nrow(gpactiveown[gpactiveown$gp_respondent==1,])*100,
          length(which(gpactiveown$l_g_share[gpactiveown$gp_respondent==1]>0))/nrow(gpactiveown[gpactiveown$t_g_share>0&gpactiveown$gp_respondent==1,])*100,
          0
        )
      )
    govtypeshareGP= cbind(c('Share Government-Owned (%)','Share Central-Owned (%)','Share Provincial-Owned (%)','Share Local-Owned (%)'),as.data.frame(govtypeshareGP1))
    colnames(govtypeshareGP) = c('',rep(c('All','Gov','NonGov'),2))
    # lp part
    tableA6_lp<-kbl(govtypeshareLP,caption = 'LP',digits = 2, booktabs = T ) %>%
      add_header_above(c(" ", "Active" = 3, "Respondent" = 3)) %>%
      kable_styling(latex_options = c("HOLD_position"))
    cat(tableA6_lp,file = 'Output\\appendix table\\TableA6_PanelA.tex')
    # gp part
    tableA6_gp<-kbl(govtypeshareGP,caption = 'GP',digits = 2, booktabs = T ) %>%
      add_header_above(c(" ", "Active" = 3, "Respondent" = 3)) %>%
      kable_styling(latex_options = c("HOLD_position"))
    cat(tableA6_gp,file = 'Output\\appendix table\\TableA6_PanelB.tex')
  }
## Table A39. Experimental Dislike for Government LPs and Stated Mechanisms ####
rm(list = ls())
pacman::p_load(data.table,stargazer,tidyverse,stringr,readxl,haven,knitr,reshape2,cowplot,RColorBrewer,kableExtra)

quali = read_xlsx("Data\\2021 survey data analysis on pros and cons\\2021qualitativesurvey.xlsx")

load("Data\\Zero2IPO data used for analysis\\gpsurveyMLdataset.rdata")

set.seed(2022)

# drop NA in q2
gpsurveyML <- gpsurveyML[which(!is.na(gpsurveyML$q2)),]
X.raw = gpsurveyML[,-c(1:4,6:18,19,20:21,23:24,27,32,36,37,41:48)]
gp.id = as.numeric(gpsurveyML$gp_fe[1:length(gpsurveyML$gp_fe)])

# use soeshare68 and 4digit-level strategic industry (at the same time)
colnames(X.raw)
X = X.raw
X$gp_total_gov_share = ifelse(is.na(X$gp_total_gov_share),0,1)

W = X$lp_relation_gov
X = X[,-c(which(colnames(X)=="lp_relation_gov"))]
Y = gpsurveyML$q2

X<- as.data.frame(
  sapply(X, function(x){
    x <- as.numeric(x[1:length(x)])
  })
)

Y = as.vector(Y)

################### baseline regressions ##################

ate.by.gp.num = data.table(gp.num = unique(gp.id),ate = as.numeric(NA))
for (i in 1:nrow(ate.by.gp.num)) {
  gp.num = ate.by.gp.num$gp.num[i]
  subset.temp = (as.numeric(gpsurveyML$gp_fe) == gp.num)
  if(length(which(subset.temp))>1){
    Y.temp = Y[subset.temp]
    X.raw.temp = X.raw[subset.temp,]
    X.raw.temp$gp_total_gov_share = ifelse(is.na(X.raw.temp$gp_total_gov_share),0,1)
    lm.temp = lm(Y.temp~.,data = X.raw.temp)
    ate.by.gp.num[i,2] = lm.temp$coefficients[3]
  }else{
    ate.by.gp.num[i,2] = NA
  }
}

ate.by.gp = ate.by.gp.num %>%
  left_join(gpsurveyML[!duplicated(gpsurveyML[,c('gp_id','gp_fe')]),c('gp_id','gp_fe')],
            by = c('gp.num' = 'gp_fe')) %>%
  mutate(rank = rank(ate)) %>%
  left_join(quali[,c(1,13:22)]) %>%
  na.omit() %>%
  data.table()

choice = c(paste0('mainone',1:5),paste0('maintwo',1:5))
get.funct.stat = function(choice,stat,funct){
  subset = ate.by.gp[eval(parse(text = choice)) == 1]
  if(funct == 'mean'){
    temp.stat = mean(subset[,eval(parse(text = stat))])
  }else if(funct == 'median'){
    temp.stat = median(subset[,eval(parse(text = stat))])
  }else if(funct == 'obs'){
    temp.stat = length(subset[,eval(parse(text = stat))])
  }else if(funct == 'sd'){
    temp.stat = sd(subset[,eval(parse(text = stat))])
  }
  return(temp.stat)
}

avg.rank.by.choice.all.ols = data.frame(Mechanisms = c('Adv 1. Regulatory Approvals and Tax Reductions',
                                                   'Adv 2. Reduce Fundraising Pressure',
                                                   'Adv 3. Access to Information',
                                                   'Adv 4. Obtain Local Government Support',
                                                   'Adv 5. Attract Potential Investors',
                                                   'Disadv 1. Investment Interference',
                                                   'Disadv 2. No Risk Tolerance',
                                                   'Disadv 3. Short Investment Horizon',
                                                   'Disadv 4. Lack of Professional Team',
                                                   'Disadv 5. Exposure to Policy Uncertainty'),
                                        All = mapply(get.funct.stat,choice,'ate','median'))

## gov

gov.nongov = data.table(gp.id,gov = X$gp_total_gov_share)[,.(gov = gov[1]),by = gp.id]
gov.id = gov.nongov[gov == 1,gp.id]
nongov.id = gov.nongov[gov == 0, gp.id]

ate.by.gp = ate.by.gp.num %>%
  filter(gp.num %in% gov.id) %>%
  left_join(gpsurveyML[!duplicated(gpsurveyML[,c('gp_id','gp_fe')]),c('gp_id','gp_fe')],
            by = c('gp.num' = 'gp_fe')) %>%
  mutate(rank = rank(ate)) %>%
  left_join(quali[,c(1,13:22)]) %>%
  na.omit() %>%
  data.table()


avg.rank.by.choice.gov.ols = data.frame(Gov_GP = mapply(get.funct.stat,choice,'ate','median'))

## non-gov


ate.by.gp = ate.by.gp.num %>% 
  filter(gp.num %in% nongov.id) %>%
  left_join(gpsurveyML[!duplicated(gpsurveyML[,c('gp_id','gp_fe')]),c('gp_id','gp_fe')],
            by = c('gp.num' = 'gp_fe')) %>%
  mutate(rank = rank(ate)) %>%
  left_join(quali[,c(1,13:22)]) %>%
  na.omit() %>%
  data.table()


avg.rank.by.choice.nongov.ols = data.frame(Non_gov_GP = mapply(get.funct.stat,choice,'ate','median'))

## output tables
final<-cbind(avg.rank.by.choice.all.ols,avg.rank.by.choice.gov.ols,avg.rank.by.choice.nongov.ols)
tableA39<-kbl(final,digits = 2,format = 'latex',booktabs = T,row.names = F,valign = 'H',
    caption = "Experimental Dislike for Government LPs and Stated Mechanisms")

cat(tableA39,file = 'Output\\appendix table\\TableA39.tex')
## Table A40. Summary Statistics with Gov defined as gov share >= 20%  ####
{
  gp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipGP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  lp1519<-read_dta("Data\\Zero2IPO data used for analysis\\ActiveOwnershipLP_2015-19_SummaryStats_exclude_foreign_entities.dta")
  gp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\gpindustry.dta")
  lp.industry<-read_dta("Data\\Zero2IPO data used for analysis\\lpindustry.dta")
  gp.region<-read_dta("Data\\Zero2IPO data used for analysis\\gpregion.dta")
  lp.region<-read_dta("Data\\Zero2IPO data used for analysis\\lpregion.dta")
} 

{
  gp1519$govgp = ifelse(gp1519$gp_total_gov_share>=20&!is.na(gp1519$gp_total_gov_share),1,0)
  lp1519$govlp = ifelse(lp1519$lp_total_gov_share>=20&!is.na(lp1519$lp_total_gov_share),1,0)
}

{
  # using gov >= 20 as gov
  gp.resp <- gp1519[gp1519$gp_respondent==1,]
  gp.gov.resp = gp.resp[gp.resp$govgp==1,]
  gp.nongov.resp = gp.resp[gp.resp$govgp==0,]
  gp.gov.active = gp1519[gp1519$govgp==1,]
  gp.nongov.active = gp1519[gp1519$govgp==0,]
  
  mean.noNA = function(x){
    mean(x,na.rm = TRUE)
  }
  mean.gp.all.resp = apply(cbind(gp.resp$gp_total_gov_share,
                                 Winsorize(gp.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                 Winsorize(gp.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                 gp.resp$Num_mgt_fund,
                                 gp.resp$Number_investment,gp.resp$Number_exits),2,mean.noNA)
  mean.gp.gov.resp = apply(cbind(gp.gov.resp$gp_total_gov_share,
                                 Winsorize(gp.gov.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                 Winsorize(gp.gov.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                 gp.gov.resp$Num_mgt_fund,
                                 gp.gov.resp$Number_investment,
                                 gp.gov.resp$Number_exits),2,mean.noNA)
  mean.gp.nongov.resp = apply(cbind(gp.nongov.resp$gp_total_gov_share,
                                    Winsorize(gp.nongov.resp$AUMrmb, probs = c(0,0.95), na.rm = T),
                                    Winsorize(gp.nongov.resp$gp_irr, probs = c(0,0.95), na.rm = T),
                                    gp.nongov.resp$Num_mgt_fund,
                                    gp.nongov.resp$Number_investment,gp.nongov.resp$Number_exits),2,mean.noNA)
  mean.gp.all.active = apply(cbind(gp1519$gp_total_gov_share,
                                   Winsorize(gp1519$AUMrmb, probs = c(0,0.95), na.rm = T) ,
                                   Winsorize(gp1519$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp1519$Num_mgt_fund,
                                   gp1519$Number_investment,gp1519$Number_exits),2,mean.noNA)
  mean.gp.gov.active = apply(cbind(gp.gov.active$gp_total_gov_share,
                                   Winsorize(gp.gov.active$AUMrmb, probs = c(0,0.95), na.rm = T) ,
                                   Winsorize(gp.gov.active$gp_irr, probs = c(0,0.95), na.rm = T),
                                   gp.gov.active$Num_mgt_fund,
                                   gp.gov.active$Number_investment,gp.gov.active$Number_exits),2,mean.noNA)
  mean.gp.nongov.active = apply(cbind(gp.nongov.active$gp_total_gov_share,
                                      Winsorize(gp.nongov.active$AUMrmb, probs = c(0,0.95), na.rm = T)  ,
                                      Winsorize(gp.nongov.active$gp_irr, probs = c(0,0.95), na.rm = T),
                                      gp.nongov.active$Num_mgt_fund,
                                      gp.nongov.active$Number_investment,gp.nongov.active$Number_exits),2,mean.noNA)
  mean.gp.summary = cbind(mean.gp.all.active,mean.gp.gov.active,mean.gp.nongov.active,
                          mean.gp.all.resp,mean.gp.gov.resp,mean.gp.nongov.resp)
  mean.gp.summary = cbind(c('Gov Share (%)','AUM','IRR (%)','Funds Num.','Investment Num.','Exit Num.'),
                          as.data.frame(mean.gp.summary))
  colnames(mean.gp.summary) = c('',rep(c('All','Gov','NonGov'),2))
  
  lp.resp <- lp1519[lp1519$lp_respondent==1,]
  gplp <- gp1519[gp1519$lp_respondent==1,]
  lp.gov.resp = lp.resp[lp.resp$govlp==1,]
  lp.nongov.resp = lp.resp[lp.resp$govlp==0,]
  gplp.gov <- gplp[gplp$govgp==1,]
  gplp.nongov <- gplp[gplp$govgp==0,]
  lp.gov.active = lp1519[lp1519$govlp==1,]
  lp.nongov.active = lp1519[lp1519$govlp==0,]
  mean.lp.all.resp = apply(cbind(lp.resp$lp_total_gov_share,
                                 Winsorize(lp.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                 lp.resp$Num_of_fund_link),2,mean.noNA)
  mean.lp.gov.resp = apply(cbind(lp.gov.resp$lp_total_gov_share,
                                 Winsorize(lp.gov.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T),
                                 lp.gov.resp$Num_of_fund_link),2,mean.noNA)
  mean.lp.nongov.resp = apply(cbind(lp.nongov.resp$lp_total_gov_share,
                                    Winsorize(lp.nongov.resp$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                    lp.nongov.resp$Num_of_fund_link),2,mean.noNA)
  mean.lp.all.active = apply(cbind(lp1519$lp_total_gov_share,
                                   Winsorize( lp1519$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                   lp1519$Num_of_fund_link),2,mean.noNA)
  mean.lp.gov.active = apply(cbind(lp.gov.active$lp_total_gov_share,
                                   Winsorize(lp.gov.active$Total_invest_in_fund, probs = c(0,0.95), na.rm = T) ,
                                   lp.gov.active$Num_of_fund_link),2,mean.noNA)
  mean.lp.nongov.active = apply(cbind(lp.nongov.active$lp_total_gov_share,
                                      Winsorize(lp.nongov.active$Total_invest_in_fund, probs = c(0,0.95), na.rm = T)   ,
                                      lp.nongov.active$Num_of_fund_link),2,mean.noNA)
  mean.lp.summary = cbind(mean.lp.all.active,mean.lp.gov.active,mean.lp.nongov.active,
                          mean.lp.all.resp,mean.lp.gov.resp,mean.lp.nongov.resp)
  
  
  mean.lp.summary = cbind(c('Gov Share (%)','Total Amt. Invested in Funds','Total Fund Links'),as.data.frame(mean.lp.summary))
  colnames(mean.lp.summary) = c('',rep(c('All','Gov','NonGov'),2))
  
  mean.gp.summary[1,c(4,7)] = 0
  mean.lp.summary[1,c(4,7)] = 0
  mean.gp.summary[1,c(3,6)] = 100
  mean.lp.summary[1,c(3,6)] = 100
  
  mean.gp.summary[1,2] = length(which(gp1519$govgp==1))/nrow(gp1519)*100
  mean.gp.summary[1,5] = length(which(gp.resp$govgp==1))/nrow(gp.resp)*100
  
  mean.lp.summary[1,2] = length(which(lp1519$govlp==1))/nrow(lp1519)*100
  mean.lp.summary[1,5] = (length(which(lp.resp$govlp==1))+length(which(gplp$govgp==1)))/(nrow(lp.resp)+nrow(gplp))*100
  
  mean.gp.summary[3,2] = median(Winsorize(gp1519$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,3] = median(Winsorize(gp1519$gp_irr[which(gp1519$govgp==1)], probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,4] = median(Winsorize(gp1519$gp_irr[which(gp1519$govgp==0)], probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,5] = median(Winsorize(gp.resp$gp_irr, probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,6] = median(Winsorize(gp.resp$gp_irr[which(gp.resp$govgp==1)], probs = c(0,0.95), na.rm = T), na.rm = T)
  mean.gp.summary[3,7] = median(Winsorize(gp.resp$gp_irr[which(gp.resp$govgp==0)], probs = c(0,0.95), na.rm = T), na.rm = T)
}

{# output table
  rm(list = ls()[which(!ls()%in%c("mean.gp.summary","mean.lp.summary"))])
  
  # lp part
  tableA40_lp<-kbl(mean.lp.summary, caption = 'Mean of key variables for LPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ", "Active" = 3, "Respondent" = 3)) %>%
    kable_styling(latex_options = c("HOLD_position"))%>%
    footnote(general = "We have 312 LP Respondents and 7974 ActiveOwnership LPs.",
             threeparttable = T)
  cat(tableA40_lp,file = 'Output\\appendix table\\TableA40_PanelA.tex')
  #gp part
  tableA40_gp<-kbl(mean.gp.summary, caption = 'Mean of key variables for GPs',digits = 2, booktabs = T ) %>%
    add_header_above(c(" ", "Active" = 3, "Respondent" = 3)) %>%
    kable_styling(latex_options = c("HOLD_position"))%>%
    footnote(general = "We have 688 GP Respondents and 6308 ActiveOwnership GPs.",
             threeparttable = T)
  cat(tableA40_gp,file = 'Output\\appendix table\\TableA40_PanelB.tex')
}
